In [1]:
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
# Define a simple transform to convert images to tensors
transform = transforms.Compose([
#Automatically converts pixels to be between 0 and 1
transforms.ToTensor(),
])
# Download the CIFAR-10 datasets: train and validation (using test as validation)
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
print("Training set size:", len(train_dataset))
print("Validation set size:", len(val_dataset))
# Optionally, create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
Files already downloaded and verified Files already downloaded and verified Training set size: 50000 Validation set size: 10000
In [2]:
# CIFAR-10 class names
class_names = train_dataset.classes
# Select one sample per class from the training dataset
samples = {}
for img, label in train_dataset:
if label not in samples:
samples[label] = img
if len(samples) == len(class_names):
break
# Create a 5 x 2 plot to display one image per class
fig, axes = plt.subplots(2, 5, figsize=(8, 4))
axes = axes.flatten()
for idx in range(10):
ax = axes[idx]
# Retrieve the image corresponding to the class index
img = samples[idx]
# Convert image from tensor (C x H x W) to numpy array (H x W x C)
img = img.permute(1, 2, 0)
ax.imshow(img)
ax.set_title(class_names[idx])
ax.axis('off')
plt.tight_layout()
plt.show()
In [3]:
# Find the label index for 'dog'
dog_label = class_names.index('dog')
# Collect 9 instances of dogs from the training dataset
dog_images = []
for img, label in train_dataset:
if label == dog_label:
dog_images.append(img)
if len(dog_images) == 9:
break
# Create a 3 x 3 plot to display the 9 dog images
fig, axes = plt.subplots(3, 3, figsize=(6, 6))
axes = axes.flatten()
for idx, img in enumerate(dog_images):
ax = axes[idx]
# Convert image from tensor (C x H x W) to numpy array (H x W x C)
img = img.permute(1, 2, 0)
ax.imshow(img)
ax.set_title('dog')
ax.axis('off')
plt.tight_layout()
plt.show()
In [4]:
# Find the label index for 'cat'
cat_label = class_names.index('cat')
# Collect 9 instances of cats from the training dataset
cat_images = []
for img, label in train_dataset:
if label == cat_label:
cat_images.append(img)
if len(cat_images) == 9:
break
# Create a 3 x 3 plot to display the 9 cat images
fig, axes = plt.subplots(3, 3, figsize=(6, 6))
axes = axes.flatten()
for idx, img in enumerate(cat_images):
ax = axes[idx]
# Convert image from tensor (C x H x W) to numpy array (H x W x C)
img = img.permute(1, 2, 0)
ax.imshow(img)
ax.set_title('cat')
ax.axis('off')
plt.tight_layout()
plt.show()
AlexNet with Batch Normalization¶
In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
class CIFAR10AlexAlike(pl.LightningModule):
def __init__(self):
super().__init__()
self.conv_layers = nn.Sequential(
nn.LazyConv2d(64, kernel_size=7, padding='same'),
nn.ReLU(),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.LazyConv2d(128, kernel_size=5, padding='same'),
nn.ReLU(),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc_layers = nn.Sequential(
nn.Flatten(),
nn.LazyLinear(512),
nn.ReLU(),
nn.LazyBatchNorm1d(),
nn.LazyLinear(512),
nn.ReLU(),
nn.LazyBatchNorm1d(),
nn.LazyLinear(10) # Output layer for 10 classes
)
def forward(self, x):
x = self.conv_layers(x)
x = self.fc_layers(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
# Log loss and accuracy at the epoch level
self.log('train_loss', loss, on_step = False, on_epoch=True, prog_bar=True)
self.log('train_acc', acc, on_step = False, on_epoch=True, prog_bar=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
# Log loss and accuracy at the epoch level
self.log('val_loss', loss, on_step = False, on_epoch=True, prog_bar=True)
self.log('val_acc', acc, on_step = False, on_epoch=True, prog_bar=True)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.001)
In [6]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import CSVLogger
csv_logger = CSVLogger(save_dir='logs/', name='AlexNetAlike', version = "")
early_stop_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=True, mode = "min")
model = CIFAR10AlexAlike()
# Create a Trainer and pass the CSV logger and callbacks (early stopping and rich progress bar)
trainer = pl.Trainer(
max_epochs=25,
logger=csv_logger,
callbacks=[early_stop_callback]
)
trainer.fit(model, train_loader, val_loader)
# Save the final model state to the log directory using Lightning
trainer.save_checkpoint('logs/AlexNetAlike/final_model.ckpt')
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/kmcalist/.local/lib/python3.10/site-packages/lightning_fabric/loggers/csv_logs.py:269: Experiment logs directory logs/AlexNetAlike/ exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
/home/kmcalist/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:652: Checkpoint directory logs/AlexNetAlike/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/kmcalist/.local/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:461: The total number of parameters detected may be inaccurate because the model contains an instance of `UninitializedParameter`. To get an accurate number, set `self.example_input_array` in your LightningModule.
| Name | Type | Params | Mode
---------------------------------------------------
0 | conv_layers | Sequential | 0 | train
1 | fc_layers | Sequential | 0 | train
---------------------------------------------------
0 Trainable params
0 Non-trainable params
0 Total params
0.000 Total estimated model params size (MB)
Sanity Checking: | | 0/? [00:00<?, ?it/s]
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.) return F.conv2d(input, weight, bias, self.stride,
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved. New best score: 1.632
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.225 >= min_delta = 0.0. New best score: 1.407
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.399 >= min_delta = 0.0. New best score: 1.008
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.081 >= min_delta = 0.0. New best score: 0.928
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.029 >= min_delta = 0.0. New best score: 0.898
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.056 >= min_delta = 0.0. New best score: 0.843
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Monitored metric val_loss did not improve in the last 10 records. Best score: 0.843. Signaling Trainer to stop.
In [6]:
# Load the model checkpoint
model = CIFAR10AlexAlike.load_from_checkpoint('logs/AlexNetAlike/final_model.ckpt')
from torchsummary import summary
# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Print the summary of the model
summary(model, input_size=(3, 32, 32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 32] 9,472
ReLU-2 [-1, 64, 32, 32] 0
BatchNorm2d-3 [-1, 64, 32, 32] 128
MaxPool2d-4 [-1, 64, 16, 16] 0
Conv2d-5 [-1, 128, 16, 16] 204,928
ReLU-6 [-1, 128, 16, 16] 0
BatchNorm2d-7 [-1, 128, 16, 16] 256
MaxPool2d-8 [-1, 128, 8, 8] 0
Conv2d-9 [-1, 256, 8, 8] 295,168
ReLU-10 [-1, 256, 8, 8] 0
BatchNorm2d-11 [-1, 256, 8, 8] 512
MaxPool2d-12 [-1, 256, 4, 4] 0
Flatten-13 [-1, 4096] 0
Linear-14 [-1, 512] 2,097,664
ReLU-15 [-1, 512] 0
BatchNorm1d-16 [-1, 512] 1,024
Linear-17 [-1, 512] 262,656
ReLU-18 [-1, 512] 0
BatchNorm1d-19 [-1, 512] 1,024
Linear-20 [-1, 10] 5,130
================================================================
Total params: 2,877,962
Trainable params: 2,877,962
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 2.90
Params size (MB): 10.98
Estimated Total Size (MB): 13.89
----------------------------------------------------------------
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)
return F.conv2d(input, weight, bias, self.stride,
In [7]:
import pandas as pd
def process_csv_logger(csv_path: str) -> pd.DataFrame:
"""
Reads a CSV logger file and processes it into a rectangular DataFrame with one row per epoch.
The CSV must contain at least the columns "epoch" and "step", along with other metric columns.
For each (epoch, metric) combination, if there are multiple entries, the row with the highest step is kept.
Parameters:
csv_path (str): The path to the CSV file.
Returns:
pd.DataFrame: A wide DataFrame with one row per epoch and columns for each metric.
"""
# Read the CSV file into a DataFrame.
df = pd.read_csv(csv_path)
# Melt the DataFrame into long format.
# Each row will have: epoch, step, metric (column name), and its corresponding value.
long_df = df.melt(id_vars=['epoch', 'step'], var_name='metric', value_name='value')
# Drop rows where value is missing.
long_df = long_df.dropna(subset=['value'])
# For each epoch and metric, keep the row with the largest step.
idx = long_df.groupby(['epoch', 'metric'])['step'].idxmax()
long_df_max = long_df.loc[idx]
# Pivot the long DataFrame back to wide format: one row per epoch, columns for each metric.
wide_df = long_df_max.pivot(index='epoch', columns='metric', values='value').reset_index()
return wide_df
# Read the metrics.csv file
alexlike_df = process_csv_logger('logs/AlexNetAlike/metrics.csv')
# Plot training and validation loss against epoch
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(alexlike_df['epoch'], alexlike_df['train_loss'], label='Training Loss')
plt.plot(alexlike_df['epoch'], alexlike_df['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
# Plot training and validation accuracy against epoch
plt.subplot(1, 2, 2)
plt.plot(alexlike_df['epoch'], alexlike_df['train_acc'], label='Training Accuracy')
plt.plot(alexlike_df['epoch'], alexlike_df['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
Activation Maximization¶
In [8]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
def activation_maximization(model, layer_name, num_filters=64, img_size=(32, 32), iterations=30, lr=0.1):
"""
Generates images that maximally activate each filter in a given layer.
Args:
model: A PyTorch Lightning model.
layer_name: The name of the layer to extract activations from.
num_filters: Number of filters in the layer (default: 64).
img_size: The size of the input image (default: (32, 32) for CIFAR-10).
iterations: Number of optimization steps.
lr: Learning rate for gradient ascent.
Returns:
A tensor containing maximized images for all filters.
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) # Move model to GPU if available
model.eval() # Set model to evaluation mode
# Hook function to store activations
activations = None
def hook_fn(module, input, output):
nonlocal activations
activations = output
# Attach hook to the specified layer
layer = dict(model.named_modules())[layer_name]
hook = layer.register_forward_hook(hook_fn)
# Create a batch of random images (one for each filter)
input_imgs = torch.randn((num_filters, 3, img_size[0], img_size[1]), requires_grad=True, device=device)
# Define optimizer
optimizer = torch.optim.Adam([input_imgs], lr=lr)
for _ in range(iterations):
optimizer.zero_grad()
model(input_imgs) # Forward pass (activations are stored in hook)
loss = -activations[:, range(num_filters)].mean(dim=[1, 2, 3]).sum() # Maximize each filter's activation
loss.backward()
optimizer.step()
# Regularization: Keep pixel values in range [0,1]
with torch.no_grad():
input_imgs.clamp_(0, 1)
hook.remove() # Remove the hook after visualization
return input_imgs.detach().cpu() # Move to CPU for visualization
In [9]:
# Name of the layer after the first MaxPool (extract from model)
layer_name = "conv_layers.3" # The first MaxPool layer in the model
# Generate activation-maximized images
maximized_imgs = activation_maximization(model, layer_name, num_filters=64, img_size=(32, 32), iterations = 2500, lr = .1)
# Convert images to a grid using torchvision.utils.make_grid
grid = make_grid(maximized_imgs, nrow=8, padding=2, normalize=True)
# Display the grid
plt.figure(figsize=(15, 15))
plt.imshow(grid.cpu().permute(1, 2, 0)) # Convert (C, H, W) to (H, W, C) for visualization
plt.axis("off")
plt.title(f"Activation Maximization for 64 Filters in {layer_name}")
plt.show()
In [10]:
# Initialize the model and move to GPU if available
#model = CIFAR10AlexAlike().to("cuda" if torch.cuda.is_available() else "cpu")
# Name of the layer after the 2nd MaxPool (extract from model)
layer_name = "conv_layers.7"
# Generate activation-maximized images
maximized_imgs = activation_maximization(model, layer_name, num_filters=128, img_size=(32, 32), iterations = 2500)
# Convert images to a grid using torchvision.utils.make_grid
grid = make_grid(maximized_imgs, nrow=16, padding=2, normalize=True)
# Display the grid
plt.figure(figsize=(15, 15))
plt.imshow(grid.cpu().permute(1, 2, 0)) # Convert (C, H, W) to (H, W, C) for visualization
plt.axis("off")
plt.title(f"Activation Maximization for 128 Filters in {layer_name}")
plt.show()
In [14]:
# Initialize the model and move to GPU if available
#model = CIFAR10AlexAlike().to("cuda" if torch.cuda.is_available() else "cpu")
# Name of the layer after the 2nd MaxPool (extract from model)
layer_name = "conv_layers.11"
# Generate activation-maximized images
maximized_imgs = activation_maximization(model, layer_name, num_filters=256, img_size=(32, 32), iterations = 2500)
# Convert images to a grid using torchvision.utils.make_grid
grid = make_grid(maximized_imgs, nrow=16, padding=2, normalize=True)
# Display the grid
plt.figure(figsize=(15, 15))
plt.imshow(grid.cpu().permute(1, 2, 0)) # Convert (C, H, W) to (H, W, C) for visualization
plt.axis("off")
plt.title(f"Activation Maximization for 256 Filters in {layer_name}")
plt.show()
Find Exemplars¶
In [12]:
import torch
import matplotlib.pyplot as plt
import numpy as np
def plot_top_images_for_filters(model, val_loader, target_layer, filter_indices, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
"""
Finds and plots the top three images from val_loader that produce the highest average activation for each filter in filter_indices
at the target_layer of the model.
Parameters:
- model: the trained LightningModule.
- val_loader: validation DataLoader.
- target_layer: the layer (e.g. one of model.conv_layers[...] objects) on which to register the hook.
- filter_indices: list of integer indices for the filters in the target_layer.
- device: device to run inference on.
"""
model.to(device)
model.eval()
# Dictionary to store lists of (activation_value, image) for each filter index.
filter_results = {f_idx: [] for f_idx in filter_indices}
# This dictionary will temporarily store the activation for each forward pass.
activations = {}
# Define hook function to capture the output of target_layer.
def hook_fn(module, input, output):
# Save the activation (detach and move to CPU for further processing)
activations['output'] = output.detach().cpu()
# Register the forward hook on the target layer.
hook_handle = target_layer.register_forward_hook(hook_fn)
with torch.no_grad():
for batch in val_loader:
# Assuming batch is a tuple (images, labels)
images, _ = batch
images = images.to(device)
# Forward pass: our hook will capture the target layer's output.
_ = model(images)
# Retrieve the activations captured by the hook.
# Expecting shape: [B, C, H, W]
act = activations['output'] # tensor on CPU
# For each image in the batch compute the average activation for the spatial dimensions.
# We assume that act is of shape [B, num_filters, H, W].
# Compute mean activation over H and W for each image.
# Resulting in shape: [B, num_filters]
mean_acts = act.mean(dim=[2, 3])
# Loop through each filter index we care about.
for f_idx in filter_indices:
# Ensure that the filter index is within the valid range.
if f_idx >= mean_acts.shape[1]:
continue
# For each image in the batch, store the mean activation for filter f_idx along with the image.
for i in range(mean_acts.shape[0]):
activation_value = mean_acts[i, f_idx].item()
# Make a copy of the image tensor and bring to CPU for plotting.
# Assuming image is in shape [C, H, W].
img = images[i].cpu()
filter_results[f_idx].append((activation_value, img))
# Remove the hook
hook_handle.remove()
# Now, for each filter, sort the images by activation value and pick the top 3.
top_images = {}
for f_idx in filter_indices:
# Sort descending by activation value.
sorted_list = sorted(filter_results[f_idx], key=lambda x: x[0], reverse=True)
# Take the top 3 entries.
top_images[f_idx] = sorted_list[:3]
# Plot the results.
num_filters = len(filter_indices)
fig, axs = plt.subplots(num_filters, 3, figsize=(12, 4 * num_filters))
# If there is only one filter, axs might be one-dimensional.
if num_filters == 1:
axs = np.expand_dims(axs, axis=0)
for row_idx, f_idx in enumerate(filter_indices):
for col_idx in range(3):
ax = axs[row_idx, col_idx]
# Check if we have enough images
if col_idx < len(top_images[f_idx]):
activation_value, img_tensor = top_images[f_idx][col_idx]
# If the images were normalized, unnormalize here.
# For example, if using mean=[0.5, 0.5, 0.5] and std=[0.5, 0.5, 0.5]:
# img_tensor = img_tensor * 0.5 + 0.5
# Convert tensor (C, H, W) to numpy array (H, W, C)
img = img_tensor.permute(1, 2, 0).numpy()
ax.imshow(np.clip(img, 0, 1))
ax.set_title(f'Filter {f_idx}\nActivation: {activation_value:.3f}')
else:
ax.axis('off')
axs[row_idx, 0].set_ylabel(f'Filter {f_idx}', fontsize=14)
plt.tight_layout()
plt.show()
return fig
# Example usage:
# Assume we want to inspect filter indices [0, 10, 20] in the first convolutional layer of conv_layers.
# For instance, if you want the hook on the first LazyConv2d layer:
# target_layer = model.conv_layers[0]
# plot_top_images_for_filters(model, val_loader, target_layer, filter_indices=[0, 10, 20])
In [13]:
# Choose the convolutional layer immediately after the first maxpool.
target_layer = model.conv_layers[3] # This is the LazyConv2d with 128 channels.
# Define a list of filter indices for the first 64 filters.
filter_indices = list(range(10))
# Call the function to plot the top three images for the specified filters.
plot_top_images_for_filters(model, val_loader, target_layer, filter_indices)
Out[13]:
In [14]:
# Choose the convolutional layer immediately after the first maxpool.
target_layer = model.conv_layers[7] # This is the LazyConv2d with 128 channels.
# Define a list of filter indices for the first 64 filters.
filter_indices = list(range(10))
# Call the function to plot the top three images for the specified filters.
plot_top_images_for_filters(model, val_loader, target_layer, filter_indices)
Out[14]:
In [15]:
# Choose the convolutional layer immediately after the first maxpool.
target_layer = model.conv_layers[11] # This is the LazyConv2d with 128 channels.
# Define a list of filter indices for the first 64 filters.
filter_indices = list(range(10))
# Call the function to plot the top three images for the specified filters.
plot_top_images_for_filters(model, val_loader, target_layer, filter_indices)
Out[15]:
VGG Model¶
In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import torch.optim as optim
class VGGStyle(pl.LightningModule):
def __init__(self, num_classes=10):
super().__init__()
# Convolutional feature extractor using Lazy layers and same padding
self.features = nn.Sequential(
# Block 1
nn.LazyConv2d(64, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(64, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 2
nn.LazyConv2d(128, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(128, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 3
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 4
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# Classifier using lazy layers; note that the input dimension for the first Linear is inferred.
self.classifier = nn.Sequential(
nn.Flatten(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.LazyLinear(num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('train_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('val_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('val_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.001)
In [17]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import CSVLogger
csv_logger = CSVLogger(save_dir='logs/', name='VGGStyle', version = "")
early_stop_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=True, mode = "min")
model = VGGStyle()
# Create a Trainer and pass the CSV logger and callbacks (early stopping and rich progress bar)
trainer = pl.Trainer(
max_epochs=25,
logger=csv_logger,
callbacks=[early_stop_callback]
)
trainer.fit(model, train_loader, val_loader)
# Save the final model state to the log directory using Lightning
trainer.save_checkpoint('logs/VGGStyle/final_model.ckpt')
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/kmcalist/.local/lib/python3.10/site-packages/lightning_fabric/loggers/csv_logs.py:269: Experiment logs directory logs/VGGStyle/ exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
/home/kmcalist/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:652: Checkpoint directory logs/VGGStyle/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/kmcalist/.local/lib/python3.10/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:461: The total number of parameters detected may be inaccurate because the model contains an instance of `UninitializedParameter`. To get an accurate number, set `self.example_input_array` in your LightningModule. | Name | Type | Params | Mode -------------------------------------------------- 0 | features | Sequential | 0 | train 1 | classifier | Sequential | 0 | train -------------------------------------------------- 0 Trainable params 0 Non-trainable params 0 Total params 0.000 Total estimated model params size (MB)
Sanity Checking: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved. New best score: 1.027
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.096 >= min_delta = 0.0. New best score: 0.931
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.254 >= min_delta = 0.0. New best score: 0.677
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.084 >= min_delta = 0.0. New best score: 0.593
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.097 >= min_delta = 0.0. New best score: 0.496
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.495
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Monitored metric val_loss did not improve in the last 10 records. Best score: 0.495. Signaling Trainer to stop.
In [18]:
# Load the model checkpoint
model = VGGStyle.load_from_checkpoint('logs/VGGStyle/final_model.ckpt')
from torchsummary import summary
# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Print the summary of the model
summary(model, input_size=(3, 32, 32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 32] 1,792
ReLU-2 [-1, 64, 32, 32] 0
BatchNorm2d-3 [-1, 64, 32, 32] 128
Conv2d-4 [-1, 64, 32, 32] 36,928
ReLU-5 [-1, 64, 32, 32] 0
BatchNorm2d-6 [-1, 64, 32, 32] 128
MaxPool2d-7 [-1, 64, 16, 16] 0
Conv2d-8 [-1, 128, 16, 16] 73,856
ReLU-9 [-1, 128, 16, 16] 0
BatchNorm2d-10 [-1, 128, 16, 16] 256
Conv2d-11 [-1, 128, 16, 16] 147,584
ReLU-12 [-1, 128, 16, 16] 0
BatchNorm2d-13 [-1, 128, 16, 16] 256
MaxPool2d-14 [-1, 128, 8, 8] 0
Conv2d-15 [-1, 256, 8, 8] 295,168
ReLU-16 [-1, 256, 8, 8] 0
BatchNorm2d-17 [-1, 256, 8, 8] 512
Conv2d-18 [-1, 256, 8, 8] 590,080
ReLU-19 [-1, 256, 8, 8] 0
BatchNorm2d-20 [-1, 256, 8, 8] 512
Conv2d-21 [-1, 256, 8, 8] 590,080
ReLU-22 [-1, 256, 8, 8] 0
BatchNorm2d-23 [-1, 256, 8, 8] 512
MaxPool2d-24 [-1, 256, 4, 4] 0
Conv2d-25 [-1, 512, 4, 4] 1,180,160
ReLU-26 [-1, 512, 4, 4] 0
BatchNorm2d-27 [-1, 512, 4, 4] 1,024
Conv2d-28 [-1, 512, 4, 4] 2,359,808
ReLU-29 [-1, 512, 4, 4] 0
BatchNorm2d-30 [-1, 512, 4, 4] 1,024
Conv2d-31 [-1, 512, 4, 4] 2,359,808
ReLU-32 [-1, 512, 4, 4] 0
BatchNorm2d-33 [-1, 512, 4, 4] 1,024
MaxPool2d-34 [-1, 512, 2, 2] 0
Flatten-35 [-1, 2048] 0
Linear-36 [-1, 512] 1,049,088
ReLU-37 [-1, 512] 0
BatchNorm1d-38 [-1, 512] 1,024
Linear-39 [-1, 512] 262,656
ReLU-40 [-1, 512] 0
BatchNorm1d-41 [-1, 512] 1,024
Linear-42 [-1, 10] 5,130
================================================================
Total params: 8,959,562
Trainable params: 8,959,562
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 6.46
Params size (MB): 34.18
Estimated Total Size (MB): 40.65
----------------------------------------------------------------
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
In [19]:
# Read the metrics.csv file for the VGGStyle model
vggstyle_df = process_csv_logger('logs/VGGStyle/metrics.csv')
# Plot training and validation loss against epoch
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(vggstyle_df['epoch'], vggstyle_df['train_loss'], label='Training Loss')
plt.plot(vggstyle_df['epoch'], vggstyle_df['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss for VGGStyle')
plt.legend()
# Plot training and validation accuracy against epoch
plt.subplot(1, 2, 2)
plt.plot(vggstyle_df['epoch'], vggstyle_df['train_acc'], label='Training Accuracy')
plt.plot(vggstyle_df['epoch'], vggstyle_df['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy for VGGStyle')
plt.legend()
plt.tight_layout()
plt.show()
In [20]:
import pandas as pd
import matplotlib.pyplot as plt
# Align the epochs and add NAs to the shorter one
max_epochs = max(alexlike_df['epoch'].max(), vggstyle_df['epoch'].max())
# Reindex both dataframes to have the same number of epochs
alexlike_df_reindexed = alexlike_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_df_reindexed = vggstyle_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
# Plot the validation accuracy curves
plt.figure(figsize=(10, 5))
plt.plot(alexlike_df_reindexed['epoch'], alexlike_df_reindexed['val_acc'], label='AlexLike Validation Accuracy')
plt.plot(vggstyle_df_reindexed['epoch'], vggstyle_df_reindexed['val_acc'], label='VGGStyle Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Comparison')
plt.legend()
plt.show()
VGG with Dropout¶
In [21]:
import torch
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class VGGStyleWithDropout(pl.LightningModule):
def __init__(self, num_classes=10, dropout_rate=0.5):
super().__init__()
# Convolutional feature extractor using Lazy layers and same padding
self.features = nn.Sequential(
# Block 1
nn.LazyConv2d(64, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(64, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout(dropout_rate),
# Block 2
nn.LazyConv2d(128, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(128, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout(dropout_rate),
# Block 3
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout(dropout_rate),
# Block 4
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout(dropout_rate),
)
# Classifier using lazy layers; note that the input dimension for the first Linear is inferred.
self.classifier = nn.Sequential(
nn.Flatten(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.Dropout(dropout_rate),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.Dropout(dropout_rate),
nn.LazyLinear(num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('train_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('val_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('val_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.001)
# Training the modified VGG model with dropout
csv_logger = CSVLogger(save_dir='logs/', name='VGGStyleWithDropout', version = "")
early_stop_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=True, mode = "min")
model = VGGStyleWithDropout()
# Create a Trainer and pass the CSV logger and callbacks (early stopping and rich progress bar)
trainer = pl.Trainer(
max_epochs=25,
logger=csv_logger,
callbacks=[early_stop_callback]
)
trainer.fit(model, train_loader, val_loader)
# Save the final model state to the log directory using Lightning
trainer.save_checkpoint('logs/VGGStyleWithDropout/final_model.ckpt')
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params | Mode
--------------------------------------------------
0 | features | Sequential | 0 | train
1 | classifier | Sequential | 0 | train
--------------------------------------------------
0 Trainable params
0 Non-trainable params
0 Total params
0.000 Total estimated model params size (MB)
Sanity Checking: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved. New best score: 1.227
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.304 >= min_delta = 0.0. New best score: 0.923
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.093 >= min_delta = 0.0. New best score: 0.830
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.147 >= min_delta = 0.0. New best score: 0.683
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.034 >= min_delta = 0.0. New best score: 0.649
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 0.600
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.036 >= min_delta = 0.0. New best score: 0.564
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.544
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.483
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.476
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.462
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.457
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 0.416
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Monitored metric val_loss did not improve in the last 10 records. Best score: 0.416. Signaling Trainer to stop.
In [22]:
from torchsummary import summary
# Load the model checkpoint
model = VGGStyleWithDropout.load_from_checkpoint('logs/VGGStyleWithDropout/final_model.ckpt')
# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Print the summary of the model
summary(model, input_size=(3, 32, 32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 32] 1,792
ReLU-2 [-1, 64, 32, 32] 0
BatchNorm2d-3 [-1, 64, 32, 32] 128
Conv2d-4 [-1, 64, 32, 32] 36,928
ReLU-5 [-1, 64, 32, 32] 0
BatchNorm2d-6 [-1, 64, 32, 32] 128
MaxPool2d-7 [-1, 64, 16, 16] 0
Dropout-8 [-1, 64, 16, 16] 0
Conv2d-9 [-1, 128, 16, 16] 73,856
ReLU-10 [-1, 128, 16, 16] 0
BatchNorm2d-11 [-1, 128, 16, 16] 256
Conv2d-12 [-1, 128, 16, 16] 147,584
ReLU-13 [-1, 128, 16, 16] 0
BatchNorm2d-14 [-1, 128, 16, 16] 256
MaxPool2d-15 [-1, 128, 8, 8] 0
Dropout-16 [-1, 128, 8, 8] 0
Conv2d-17 [-1, 256, 8, 8] 295,168
ReLU-18 [-1, 256, 8, 8] 0
BatchNorm2d-19 [-1, 256, 8, 8] 512
Conv2d-20 [-1, 256, 8, 8] 590,080
ReLU-21 [-1, 256, 8, 8] 0
BatchNorm2d-22 [-1, 256, 8, 8] 512
Conv2d-23 [-1, 256, 8, 8] 590,080
ReLU-24 [-1, 256, 8, 8] 0
BatchNorm2d-25 [-1, 256, 8, 8] 512
MaxPool2d-26 [-1, 256, 4, 4] 0
Dropout-27 [-1, 256, 4, 4] 0
Conv2d-28 [-1, 512, 4, 4] 1,180,160
ReLU-29 [-1, 512, 4, 4] 0
BatchNorm2d-30 [-1, 512, 4, 4] 1,024
Conv2d-31 [-1, 512, 4, 4] 2,359,808
ReLU-32 [-1, 512, 4, 4] 0
BatchNorm2d-33 [-1, 512, 4, 4] 1,024
Conv2d-34 [-1, 512, 4, 4] 2,359,808
ReLU-35 [-1, 512, 4, 4] 0
BatchNorm2d-36 [-1, 512, 4, 4] 1,024
MaxPool2d-37 [-1, 512, 2, 2] 0
Dropout-38 [-1, 512, 2, 2] 0
Flatten-39 [-1, 2048] 0
Linear-40 [-1, 512] 1,049,088
ReLU-41 [-1, 512] 0
BatchNorm1d-42 [-1, 512] 1,024
Dropout-43 [-1, 512] 0
Linear-44 [-1, 512] 262,656
ReLU-45 [-1, 512] 0
BatchNorm1d-46 [-1, 512] 1,024
Dropout-47 [-1, 512] 0
Linear-48 [-1, 10] 5,130
================================================================
Total params: 8,959,562
Trainable params: 8,959,562
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 6.70
Params size (MB): 34.18
Estimated Total Size (MB): 40.89
----------------------------------------------------------------
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
In [23]:
# Read the metrics.csv file for the VGGStyleWithDropout model
vggstyle_dropout_df = process_csv_logger('logs/VGGStyleWithDropout/metrics.csv')
# Plot training and validation loss and accuracy for VGGStyleWithDropout model
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(vggstyle_dropout_df['epoch'], vggstyle_dropout_df['train_loss'], label='Training Loss')
plt.plot(vggstyle_dropout_df['epoch'], vggstyle_dropout_df['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('VGGStyleWithDropout Training and Validation Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(vggstyle_dropout_df['epoch'], vggstyle_dropout_df['train_acc'], label='Training Accuracy')
plt.plot(vggstyle_dropout_df['epoch'], vggstyle_dropout_df['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('VGGStyleWithDropout Training and Validation Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
In [24]:
# Align the epochs and add NAs to the shorter one
max_epochs = max(alexlike_df['epoch'].max(), vggstyle_df['epoch'].max(), vggstyle_dropout_df['epoch'].max())
# Reindex all dataframes to have the same number of epochs
alexlike_df_reindexed = alexlike_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_df_reindexed = vggstyle_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_dropout_df_reindexed = vggstyle_dropout_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
# Plot the validation accuracy curves
plt.figure(figsize=(10, 5))
plt.plot(alexlike_df_reindexed['epoch'], alexlike_df_reindexed['val_acc'], label='AlexLike Validation Accuracy')
plt.plot(vggstyle_df_reindexed['epoch'], vggstyle_df_reindexed['val_acc'], label='VGGStyle Validation Accuracy')
plt.plot(vggstyle_dropout_df_reindexed['epoch'], vggstyle_dropout_df_reindexed['val_acc'], label='VGGStyleWithDropout Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Comparison')
plt.legend()
plt.show()
Data Augmentation¶
In [26]:
import torch
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
# Download CIFAR-10 training dataset (if not already downloaded)
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)
# Get the index for 'cat' (CIFAR-10 classes: 'airplane', 'automobile', 'bird', 'cat', ...)
cat_idx = train_dataset.class_to_idx['cat']
# Select the first cat image from the training set
cat_img = None
for img, label in train_dataset:
if label == cat_idx:
cat_img = img # This is a PIL Image
break
if cat_img is None:
raise ValueError("No cat image found in the dataset.")
# Define a data augmentation pipeline for demonstration
augmentation_transforms = transforms.Compose([
#transforms.RandomResizedCrop(size=32, scale=(0.8, 1.0)),
transforms.RandomHorizontalFlip(),
#transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
transforms.RandomRotation(degrees=15),
transforms.ToTensor() # Convert augmented PIL Image to tensor for visualization
])
# Apply augmentations multiple times to the same image
augmented_images = []
num_examples = 8 # How many augmented versions to show
for _ in range(num_examples):
augmented = augmentation_transforms(cat_img)
augmented_images.append(augmented)
# Plot the augmented images in a 2 x 4 grid
fig, axes = plt.subplots(2, 4, figsize=(12, 6))
axes = axes.flatten()
for i, ax in enumerate(axes):
# Convert tensor (C x H x W) to numpy array (H x W x C)
img_np = augmented_images[i].permute(1, 2, 0).numpy()
ax.imshow(img_np)
ax.axis("off")
plt.suptitle("Data Augmentation Examples for a Cat Instance")
plt.tight_layout()
plt.show()
Files already downloaded and verified
In [27]:
import torch
import torchvision
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import CSVLogger
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#Note that torchvision will handle this process
#Will do this jittering after a batch is loaded from the DataLoader
# Define data augmentation transforms
augmentation_transforms = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(degrees=15),
transforms.ToTensor()
])
# Apply data augmentation to the training dataset
train_dataset_augmented = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=augmentation_transforms)
# Create data loaders
train_loader_augmented = torch.utils.data.DataLoader(train_dataset_augmented, batch_size=64, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
# Define the VGG model without dropout
class VGGStyleAugmentation(pl.LightningModule):
def __init__(self, num_classes=10):
super().__init__()
# Convolutional feature extractor using Lazy layers and same padding
self.features = nn.Sequential(
# Block 1
nn.LazyConv2d(64, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(64, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 2
nn.LazyConv2d(128, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(128, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 3
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(256, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 4
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.LazyConv2d(512, kernel_size=3, padding='same'),
nn.ReLU(inplace=True),
nn.LazyBatchNorm2d(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# Classifier using lazy layers; note that the input dimension for the first Linear is inferred.
self.classifier = nn.Sequential(
nn.Flatten(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.LazyLinear(num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('train_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('val_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('val_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.001)
# Training the VGG model without dropout but with data augmentation
csv_logger = CSVLogger(save_dir='logs/', name='VGGStyleAugmentation', version = "")
early_stop_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=True, mode = "min")
model = VGGStyleAugmentation()
# Create a Trainer and pass the CSV logger and callbacks (early stopping and rich progress bar)
trainer = pl.Trainer(
max_epochs=25,
logger=csv_logger,
callbacks=[early_stop_callback]
)
trainer.fit(model, train_loader_augmented, val_loader)
# Save the final model state to the log directory using Lightning
trainer.save_checkpoint('logs/VGGStyleAugmentation/final_model.ckpt')
Files already downloaded and verified
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params | Mode
--------------------------------------------------
0 | features | Sequential | 0 | train
1 | classifier | Sequential | 0 | train
--------------------------------------------------
0 Trainable params
0 Non-trainable params
0 Total params
0.000 Total estimated model params size (MB)
Sanity Checking: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved. New best score: 1.058
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.210 >= min_delta = 0.0. New best score: 0.849
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.149 >= min_delta = 0.0. New best score: 0.700
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.695
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.125 >= min_delta = 0.0. New best score: 0.570
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.076 >= min_delta = 0.0. New best score: 0.494
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.059 >= min_delta = 0.0. New best score: 0.435
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.029 >= min_delta = 0.0. New best score: 0.407
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.389
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 0.368
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.367
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.365
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.363
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.361
Validation: | | 0/? [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_epochs=25` reached.
In [28]:
from torchsummary import summary
# Load the model checkpoint
model = VGGStyleAugmentation.load_from_checkpoint('logs/VGGStyleAugmentation/final_model.ckpt')
# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Print the summary of the model
summary(model, input_size=(3, 32, 32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 32] 1,792
ReLU-2 [-1, 64, 32, 32] 0
BatchNorm2d-3 [-1, 64, 32, 32] 128
Conv2d-4 [-1, 64, 32, 32] 36,928
ReLU-5 [-1, 64, 32, 32] 0
BatchNorm2d-6 [-1, 64, 32, 32] 128
MaxPool2d-7 [-1, 64, 16, 16] 0
Conv2d-8 [-1, 128, 16, 16] 73,856
ReLU-9 [-1, 128, 16, 16] 0
BatchNorm2d-10 [-1, 128, 16, 16] 256
Conv2d-11 [-1, 128, 16, 16] 147,584
ReLU-12 [-1, 128, 16, 16] 0
BatchNorm2d-13 [-1, 128, 16, 16] 256
MaxPool2d-14 [-1, 128, 8, 8] 0
Conv2d-15 [-1, 256, 8, 8] 295,168
ReLU-16 [-1, 256, 8, 8] 0
BatchNorm2d-17 [-1, 256, 8, 8] 512
Conv2d-18 [-1, 256, 8, 8] 590,080
ReLU-19 [-1, 256, 8, 8] 0
BatchNorm2d-20 [-1, 256, 8, 8] 512
Conv2d-21 [-1, 256, 8, 8] 590,080
ReLU-22 [-1, 256, 8, 8] 0
BatchNorm2d-23 [-1, 256, 8, 8] 512
MaxPool2d-24 [-1, 256, 4, 4] 0
Conv2d-25 [-1, 512, 4, 4] 1,180,160
ReLU-26 [-1, 512, 4, 4] 0
BatchNorm2d-27 [-1, 512, 4, 4] 1,024
Conv2d-28 [-1, 512, 4, 4] 2,359,808
ReLU-29 [-1, 512, 4, 4] 0
BatchNorm2d-30 [-1, 512, 4, 4] 1,024
Conv2d-31 [-1, 512, 4, 4] 2,359,808
ReLU-32 [-1, 512, 4, 4] 0
BatchNorm2d-33 [-1, 512, 4, 4] 1,024
MaxPool2d-34 [-1, 512, 2, 2] 0
Flatten-35 [-1, 2048] 0
Linear-36 [-1, 512] 1,049,088
ReLU-37 [-1, 512] 0
BatchNorm1d-38 [-1, 512] 1,024
Linear-39 [-1, 512] 262,656
ReLU-40 [-1, 512] 0
BatchNorm1d-41 [-1, 512] 1,024
Linear-42 [-1, 10] 5,130
================================================================
Total params: 8,959,562
Trainable params: 8,959,562
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 6.46
Params size (MB): 34.18
Estimated Total Size (MB): 40.65
----------------------------------------------------------------
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
In [29]:
# Read the metrics.csv file for the VGGStyleAugmentation model
vggstyle_augmentation_df = process_csv_logger('logs/VGGStyleAugmentation/metrics.csv')
# Plot training and validation loss and accuracy for VGGStyleAugmentation model
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(vggstyle_augmentation_df['epoch'], vggstyle_augmentation_df['train_loss'], label='Training Loss')
plt.plot(vggstyle_augmentation_df['epoch'], vggstyle_augmentation_df['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('VGGStyleAugmentation Training and Validation Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(vggstyle_augmentation_df['epoch'], vggstyle_augmentation_df['train_acc'], label='Training Accuracy')
plt.plot(vggstyle_augmentation_df['epoch'], vggstyle_augmentation_df['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('VGGStyleAugmentation Training and Validation Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
In [30]:
# Align the epochs and add NAs to the shorter one
max_epochs = max(alexlike_df['epoch'].max(), vggstyle_df['epoch'].max(), vggstyle_dropout_df['epoch'].max(), vggstyle_augmentation_df['epoch'].max())
# Reindex all dataframes to have the same number of epochs
alexlike_df_reindexed = alexlike_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_df_reindexed = vggstyle_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_dropout_df_reindexed = vggstyle_dropout_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_augmentation_df_reindexed = vggstyle_augmentation_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
# Plot the validation accuracy curves
plt.figure(figsize=(10, 5))
plt.plot(alexlike_df_reindexed['epoch'], alexlike_df_reindexed['val_acc'], label='AlexLike Validation Accuracy')
plt.plot(vggstyle_df_reindexed['epoch'], vggstyle_df_reindexed['val_acc'], label='VGGStyle Validation Accuracy')
plt.plot(vggstyle_dropout_df_reindexed['epoch'], vggstyle_dropout_df_reindexed['val_acc'], label='VGGStyleWithDropout Validation Accuracy')
plt.plot(vggstyle_augmentation_df_reindexed['epoch'], vggstyle_augmentation_df_reindexed['val_acc'], label='VGGStyleAugmentation Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Comparison')
plt.legend()
plt.show()
ResNets¶
In [34]:
import torch
import pytorch_lightning as pl
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import CSVLogger
# Residual Block with explicit input channels
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
# If the input channels do not match the output channels or stride != 1, use a projection for the shortcut.
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1,
stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
else:
self.shortcut = nn.Identity()
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = self.relu(out)
return out
class VGGResidual(pl.LightningModule):
def __init__(self, num_classes=10):
super(VGGResidual, self).__init__()
self.features = nn.Sequential(
# Block 1: input from image (3 channels) -> 64 channels
ResidualBlock(in_channels=3, out_channels=64, stride=1),
ResidualBlock(in_channels=64, out_channels=64, stride=1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
# Block 2: 64 -> 128 channels; use stride=2 for downsampling in the first block
ResidualBlock(in_channels=64, out_channels=128, stride=1),
ResidualBlock(in_channels=128, out_channels=128, stride=1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
# Block 3: 128 -> 256 channels
ResidualBlock(in_channels=128, out_channels=256, stride=1),
ResidualBlock(in_channels=256, out_channels=256, stride=1),
ResidualBlock(in_channels=256, out_channels=256, stride=1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
# Block 4: 256 -> 512 channels
ResidualBlock(in_channels=256, out_channels=512, stride=1),
ResidualBlock(in_channels=512, out_channels=512, stride=1),
ResidualBlock(in_channels=512, out_channels=512, stride=1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
)
# Classifier using lazy layers (input dimension is inferred)
self.classifier = nn.Sequential(
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.LazyLinear(num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('train_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('val_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
self.log('val_acc', acc, prog_bar=True, on_step=False, on_epoch=True)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=0.001)
# Set up logging and early stopping
csv_logger = CSVLogger(save_dir='logs/', name='VGGResidual', version="")
early_stop_callback = EarlyStopping(monitor='val_loss', patience=10, verbose=True, mode="min")
# Create the model instance
model = VGGResidual()
# Assume train_loader and val_loader are defined DataLoaders
trainer = pl.Trainer(
max_epochs=25,
logger=csv_logger,
callbacks=[early_stop_callback]
)
trainer.fit(model, train_loader, val_loader)
# Save the final model state
trainer.save_checkpoint('logs/VGGResidual/final_model.ckpt')
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs /home/kmcalist/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:652: Checkpoint directory logs/VGGResidual/checkpoints exists and is not empty. LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params | Mode -------------------------------------------------- 0 | features | Sequential | 17.0 M | train 1 | classifier | Sequential | 0 | train -------------------------------------------------- 17.0 M Trainable params 0 Non-trainable params 17.0 M Total params 68.134 Total estimated model params size (MB)
Sanity Checking: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved. New best score: 1.443
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.781 >= min_delta = 0.0. New best score: 0.662
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.630
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.108 >= min_delta = 0.0. New best score: 0.522
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.036 >= min_delta = 0.0. New best score: 0.486
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Monitored metric val_loss did not improve in the last 10 records. Best score: 0.486. Signaling Trainer to stop.
In [35]:
from torchsummary import summary
# Load the model checkpoint
model = VGGResidual.load_from_checkpoint('logs/VGGResidual/final_model.ckpt')
# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Print the summary of the model
summary(model, input_size=(3, 32, 32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 32] 1,728
BatchNorm2d-2 [-1, 64, 32, 32] 128
ReLU-3 [-1, 64, 32, 32] 0
Conv2d-4 [-1, 64, 32, 32] 36,864
BatchNorm2d-5 [-1, 64, 32, 32] 128
Conv2d-6 [-1, 64, 32, 32] 192
BatchNorm2d-7 [-1, 64, 32, 32] 128
ReLU-8 [-1, 64, 32, 32] 0
ResidualBlock-9 [-1, 64, 32, 32] 0
Conv2d-10 [-1, 64, 32, 32] 36,864
BatchNorm2d-11 [-1, 64, 32, 32] 128
ReLU-12 [-1, 64, 32, 32] 0
Conv2d-13 [-1, 64, 32, 32] 36,864
BatchNorm2d-14 [-1, 64, 32, 32] 128
Identity-15 [-1, 64, 32, 32] 0
ReLU-16 [-1, 64, 32, 32] 0
ResidualBlock-17 [-1, 64, 32, 32] 0
MaxPool2d-18 [-1, 64, 16, 16] 0
Conv2d-19 [-1, 128, 16, 16] 73,728
BatchNorm2d-20 [-1, 128, 16, 16] 256
ReLU-21 [-1, 128, 16, 16] 0
Conv2d-22 [-1, 128, 16, 16] 147,456
BatchNorm2d-23 [-1, 128, 16, 16] 256
Conv2d-24 [-1, 128, 16, 16] 8,192
BatchNorm2d-25 [-1, 128, 16, 16] 256
ReLU-26 [-1, 128, 16, 16] 0
ResidualBlock-27 [-1, 128, 16, 16] 0
Conv2d-28 [-1, 128, 16, 16] 147,456
BatchNorm2d-29 [-1, 128, 16, 16] 256
ReLU-30 [-1, 128, 16, 16] 0
Conv2d-31 [-1, 128, 16, 16] 147,456
BatchNorm2d-32 [-1, 128, 16, 16] 256
Identity-33 [-1, 128, 16, 16] 0
ReLU-34 [-1, 128, 16, 16] 0
ResidualBlock-35 [-1, 128, 16, 16] 0
MaxPool2d-36 [-1, 128, 8, 8] 0
Conv2d-37 [-1, 256, 8, 8] 294,912
BatchNorm2d-38 [-1, 256, 8, 8] 512
ReLU-39 [-1, 256, 8, 8] 0
Conv2d-40 [-1, 256, 8, 8] 589,824
BatchNorm2d-41 [-1, 256, 8, 8] 512
Conv2d-42 [-1, 256, 8, 8] 32,768
BatchNorm2d-43 [-1, 256, 8, 8] 512
ReLU-44 [-1, 256, 8, 8] 0
ResidualBlock-45 [-1, 256, 8, 8] 0
Conv2d-46 [-1, 256, 8, 8] 589,824
BatchNorm2d-47 [-1, 256, 8, 8] 512
ReLU-48 [-1, 256, 8, 8] 0
Conv2d-49 [-1, 256, 8, 8] 589,824
BatchNorm2d-50 [-1, 256, 8, 8] 512
Identity-51 [-1, 256, 8, 8] 0
ReLU-52 [-1, 256, 8, 8] 0
ResidualBlock-53 [-1, 256, 8, 8] 0
Conv2d-54 [-1, 256, 8, 8] 589,824
BatchNorm2d-55 [-1, 256, 8, 8] 512
ReLU-56 [-1, 256, 8, 8] 0
Conv2d-57 [-1, 256, 8, 8] 589,824
BatchNorm2d-58 [-1, 256, 8, 8] 512
Identity-59 [-1, 256, 8, 8] 0
ReLU-60 [-1, 256, 8, 8] 0
ResidualBlock-61 [-1, 256, 8, 8] 0
MaxPool2d-62 [-1, 256, 4, 4] 0
Conv2d-63 [-1, 512, 4, 4] 1,179,648
BatchNorm2d-64 [-1, 512, 4, 4] 1,024
ReLU-65 [-1, 512, 4, 4] 0
Conv2d-66 [-1, 512, 4, 4] 2,359,296
BatchNorm2d-67 [-1, 512, 4, 4] 1,024
Conv2d-68 [-1, 512, 4, 4] 131,072
BatchNorm2d-69 [-1, 512, 4, 4] 1,024
ReLU-70 [-1, 512, 4, 4] 0
ResidualBlock-71 [-1, 512, 4, 4] 0
Conv2d-72 [-1, 512, 4, 4] 2,359,296
BatchNorm2d-73 [-1, 512, 4, 4] 1,024
ReLU-74 [-1, 512, 4, 4] 0
Conv2d-75 [-1, 512, 4, 4] 2,359,296
BatchNorm2d-76 [-1, 512, 4, 4] 1,024
Identity-77 [-1, 512, 4, 4] 0
ReLU-78 [-1, 512, 4, 4] 0
ResidualBlock-79 [-1, 512, 4, 4] 0
Conv2d-80 [-1, 512, 4, 4] 2,359,296
BatchNorm2d-81 [-1, 512, 4, 4] 1,024
ReLU-82 [-1, 512, 4, 4] 0
Conv2d-83 [-1, 512, 4, 4] 2,359,296
BatchNorm2d-84 [-1, 512, 4, 4] 1,024
Identity-85 [-1, 512, 4, 4] 0
ReLU-86 [-1, 512, 4, 4] 0
ResidualBlock-87 [-1, 512, 4, 4] 0
MaxPool2d-88 [-1, 512, 2, 2] 0
AdaptiveAvgPool2d-89 [-1, 512, 1, 1] 0
Flatten-90 [-1, 512] 0
Linear-91 [-1, 10] 5,130
================================================================
Total params: 17,038,602
Trainable params: 17,038,602
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 17.68
Params size (MB): 65.00
Estimated Total Size (MB): 82.69
----------------------------------------------------------------
/home/kmcalist/.local/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
In [36]:
# Read the metrics.csv file for the VGGResidual model
vggresidual_df = process_csv_logger('logs/VGGResidual/metrics.csv')
# Plot training and validation loss and accuracy for VGGResidual model
plt.figure(figsize=(12, 10))
# Plot training and validation loss
plt.subplot(2, 1, 1)
plt.plot(vggresidual_df['epoch'], vggresidual_df['train_loss'], label='Training Loss')
plt.plot(vggresidual_df['epoch'], vggresidual_df['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('VGGResidual Training and Validation Loss')
plt.legend()
# Plot training and validation accuracy
plt.subplot(2, 1, 2)
plt.plot(vggresidual_df['epoch'], vggresidual_df['train_acc'], label='Training Accuracy')
plt.plot(vggresidual_df['epoch'], vggresidual_df['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('VGGResidual Training and Validation Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
In [37]:
# Align the epochs and add NAs to the shorter one
max_epochs = max(alexlike_df['epoch'].max(), vggstyle_df['epoch'].max(), vggstyle_dropout_df['epoch'].max(), vggstyle_augmentation_df['epoch'].max(), vggresidual_df['epoch'].max())
# Reindex all dataframes to have the same number of epochs
alexlike_df_reindexed = alexlike_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_df_reindexed = vggstyle_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_dropout_df_reindexed = vggstyle_dropout_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_augmentation_df_reindexed = vggstyle_augmentation_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggresidual_df_reindexed = vggresidual_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
# Plot the validation accuracy curves
plt.figure(figsize=(10, 5))
plt.plot(alexlike_df_reindexed['epoch'], alexlike_df_reindexed['val_acc'], label='AlexLike Validation Accuracy')
plt.plot(vggstyle_df_reindexed['epoch'], vggstyle_df_reindexed['val_acc'], label='VGGStyle Validation Accuracy')
plt.plot(vggstyle_dropout_df_reindexed['epoch'], vggstyle_dropout_df_reindexed['val_acc'], label='VGGStyleWithDropout Validation Accuracy')
plt.plot(vggstyle_augmentation_df_reindexed['epoch'], vggstyle_augmentation_df_reindexed['val_acc'], label='VGGStyleAugmentation Validation Accuracy')
plt.plot(vggresidual_df_reindexed['epoch'], vggresidual_df_reindexed['val_acc'], label='VGGResidual Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Comparison')
plt.legend()
plt.show()
ResNet50 via Transfer Learning¶
In [41]:
import torchvision.models as models
resnet152 = models.resnet152(pretrained=True)
print(resnet152)
/home/kmcalist/.local/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /home/kmcalist/.local/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet152_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet152_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg) Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /home/kmcalist/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth 100%|██████████| 230M/230M [00:04<00:00, 59.0MB/s]
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer2): Sequential(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(4): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(5): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(6): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(7): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer3): Sequential(
(0): Bottleneck(
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(3): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(4): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(5): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(6): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(7): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(8): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(9): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(10): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(11): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(12): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(13): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(14): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(15): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(16): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(17): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(18): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(19): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(20): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(21): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(22): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(23): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(24): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(25): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(26): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(27): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(28): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(29): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(30): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(31): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(32): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(33): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(34): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(35): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer4): Sequential(
(0): Bottleneck(
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=2048, out_features=1000, bias=True)
)
In [43]:
# First, need to resize images to match ResNet pretrain size
from torchvision import transforms
import torchvision
# Define a transform for training that includes resizing and optional augmentation
train_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()
])
# Define a transform for training that includes resizing and optional augmentation
val_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()
])
# Create the CIFAR-10 datasets with the corresponding transforms
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=val_transform)
# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
Files already downloaded and verified Files already downloaded and verified
In [46]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import torchvision.models as models
import torch.optim as optim
class ResNet152FineTune(pl.LightningModule):
def __init__(self, num_classes=10):
super().__init__()
# Load a pretrained ResNet50
self.model = models.resnet152(pretrained=True)
# Freeze all layers
for param in self.model.parameters():
param.requires_grad = False
# Unfreeze the top convolutional block (layer4)
for param in self.model.layer4.parameters():
param.requires_grad = True
# Replace the fully connected classifier with a 2-layer FCNN.
# The original fc layer's input dimension is preserved.
in_features = self.model.fc.in_features
self.model.fc = nn.Sequential(
nn.Flatten(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.LazyLinear(512),
nn.ReLU(inplace=True),
nn.LazyBatchNorm1d(),
nn.LazyLinear(num_classes)
)
def forward(self, x):
return self.model(x)
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('train_loss', loss, prog_bar=True, on_epoch=True)
self.log('train_acc', acc, prog_bar=True, on_epoch=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
acc = (logits.argmax(dim=1) == y).float().mean()
self.log('val_loss', loss, prog_bar=True, on_epoch=True)
self.log('val_acc', acc, prog_bar=True, on_epoch=True)
def configure_optimizers(self):
# Only layer4 and the new FC layers will have gradients.
return optim.Adam(self.parameters(), lr=1e-3)
# Set up logging and early stopping
csv_logger = CSVLogger(save_dir='logs/', name='ResNet152FineTune', version="")
early_stop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=True, mode="min")
# Create the model instance
model = ResNet152FineTune(num_classes=10)
# Assume train_loader and val_loader are defined DataLoaders
trainer = pl.Trainer(
max_epochs=5,
logger=csv_logger,
callbacks=[early_stop_callback]
)
trainer.fit(model, train_loader, val_loader)
# Save the final model state
trainer.save_checkpoint('logs/ResNet152FineTune/final_model.ckpt')
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params | Mode ----------------------------------------- 0 | model | ResNet | 58.1 M | train ----------------------------------------- 15.0 M Trainable params 43.2 M Non-trainable params 58.1 M Total params 232.575 Total estimated model params size (MB)
Sanity Checking: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved. New best score: 0.315
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.052 >= min_delta = 0.0. New best score: 0.262
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.260 `Trainer.fit` stopped: `max_epochs=5` reached.
In [49]:
# Read the metrics.csv file for the ResNet152FineTune model
resnet152_finetune_df = process_csv_logger('logs/ResNet152FineTune/metrics.csv')
# Plot training and validation loss and accuracy for ResNet152FineTune model
plt.figure(figsize=(12, 10))
# Plot training and validation loss
plt.subplot(2, 1, 1)
plt.plot(resnet152_finetune_df['epoch'], resnet152_finetune_df['train_loss_epoch'], label='Training Loss')
plt.plot(resnet152_finetune_df['epoch'], resnet152_finetune_df['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('ResNet152FineTune Training and Validation Loss')
plt.legend()
# Plot training and validation accuracy
plt.subplot(2, 1, 2)
plt.plot(resnet152_finetune_df['epoch'], resnet152_finetune_df['train_acc_epoch'], label='Training Accuracy')
plt.plot(resnet152_finetune_df['epoch'], resnet152_finetune_df['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('ResNet152FineTune Training and Validation Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
In [50]:
# Align the epochs and add NAs to the shorter one
max_epochs = max(alexlike_df['epoch'].max(), vggstyle_df['epoch'].max(), vggstyle_dropout_df['epoch'].max(), vggstyle_augmentation_df['epoch'].max(), vggresidual_df['epoch'].max(), resnet152_finetune_df['epoch'].max())
# Reindex all dataframes to have the same number of epochs
alexlike_df_reindexed = alexlike_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_df_reindexed = vggstyle_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_dropout_df_reindexed = vggstyle_dropout_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggstyle_augmentation_df_reindexed = vggstyle_augmentation_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
vggresidual_df_reindexed = vggresidual_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
resnet152_finetune_df_reindexed = resnet152_finetune_df.set_index('epoch').reindex(range(max_epochs + 1)).reset_index()
# Plot the validation accuracy curves
plt.figure(figsize=(10, 5))
plt.plot(alexlike_df_reindexed['epoch'], alexlike_df_reindexed['val_acc'], label='AlexLike Validation Accuracy')
plt.plot(vggstyle_df_reindexed['epoch'], vggstyle_df_reindexed['val_acc'], label='VGGStyle Validation Accuracy')
plt.plot(vggstyle_dropout_df_reindexed['epoch'], vggstyle_dropout_df_reindexed['val_acc'], label='VGGStyleWithDropout Validation Accuracy')
plt.plot(vggstyle_augmentation_df_reindexed['epoch'], vggstyle_augmentation_df_reindexed['val_acc'], label='VGGStyleAugmentation Validation Accuracy')
plt.plot(vggresidual_df_reindexed['epoch'], vggresidual_df_reindexed['val_acc'], label='VGGResidual Validation Accuracy')
plt.plot(resnet152_finetune_df_reindexed['epoch'], resnet152_finetune_df_reindexed['val_acc'], label='ResNet152FineTune Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Comparison')
plt.legend()
plt.show()